In [1]:
from libtools import *
%matplotlib inline
In [2]:
training_data = pd.read_csv('facies_vectors.csv')
training_data.describe()
Out[2]:
The nine discrete facies (classes of rocks) are:
| Facies | Label | Adjacent Facies | Facies |
|---|---|---|---|
| 1 | SS | 2 | Nonmarine sandstone |
| 2 | CSiS | 1,3 | Nonmarine coarse siltstone |
| 3 | FSiS | 2 | Nonmarine fine siltstone |
| 4 | SiSh | 5 | Marine siltstone and shale |
| 5 | MS | 4,6 | Mudstone (limestone) |
| 6 | WS | 5,7 | Wackestone (limestone) |
| 7 | D | 6,8 | Dolomite |
| 8 | PS | 6,7,9 | Packstone-grainstone (limestone) |
| 9 | BS | 7,8 | Phylloid-algal bafflestone (limestone) |
In [3]:
facies_colors = ['#F4D03F', '#F5B041','#DC7633','#6E2C00',
'#1B4F72','#2E86C1', '#AED6F1', '#A569BD', '#196F3D']
facies_labels = ['SS', 'CSiS', 'FSiS', 'SiSh', 'MS',
'WS', 'D','PS', 'BS']
#facies_color_map is a dictionary that maps facies labels
#to their respective colors
facies_color_map = {}
for ind, label in enumerate(facies_labels):
facies_color_map[label] = facies_colors[ind]
In [4]:
training_data.loc[:,'FaciesLabels'] = training_data.apply(lambda row: label_facies(row, facies_labels), axis=1)
training_data['FaciesLabels2'] = training_data.Facies.apply(litologia)
In [5]:
training_data.head() #Displaying the DataFrame with FaciesLabels
Out[5]:
In [6]:
well_names = training_data['Well Name'].unique()
well_names
Out[6]:
In [7]:
well_names = training_data['Well Name'].unique()
# Plotting all logs contain in this DataFrame
for i in well_names:
make_facies_log_plot(training_data[training_data['Well Name'] == i], facies_colors)
In [8]:
import seaborn as sns
In [9]:
sns.countplot(training_data.FaciesLabels)
Out[9]:
In [10]:
sns.countplot(training_data.FaciesLabels, hue=training_data.NM_M)
Out[10]:
In [11]:
training_data['PhiN'] = training_data.apply(phi_n, axis=1)
training_data['PhiD'] = training_data.apply(phi_d, axis=1)
training_data.head()
Out[11]:
In [12]:
sns.lmplot('PhiN','PhiD',data=training_data,fit_reg=False,hue='FaciesLabels')
plt.xlim(0,100)
plt.ylim(0,100)
x=[0,100]
y=[0,100]
plt.plot(x,y)
Out[12]:
In [13]:
HC_training_data = training_data[training_data.PhiD > training_data.PhiN]
HC_training_data['FaciesLabels2'].value_counts()
Out[13]:
In [14]:
HC_training_data['FaciesLabels2'].value_counts()/training_data.shape[0] *100 #Percentual of each facies
Out[14]:
In [15]:
auxiliar = HC_training_data['FaciesLabels'].value_counts()/training_data.shape[0] *100
print('%.2f in percent' % auxiliar.sum()) #Total percentual of HC_training_data in training_data
In [16]:
df_litology = training_data.groupby(training_data.FaciesLabels2)
In [17]:
df_litology['PhiN','PhiD'].mean()
Out[17]:
In [18]:
df_litology['PhiN','PhiD'].median()
Out[18]:
In [19]:
df_litology['PhiN','PhiD'].max()
Out[19]:
In [20]:
df_litology['PhiN','PhiD'].min()
Out[20]:
In [21]:
sns.jointplot("PhiN", "PhiD", data=training_data, kind="reg")
plt.xlim(-5,100)
plt.ylim(-5,100)
Out[21]:
In [22]:
vector_facies = training_data.FaciesLabels2.unique()
for i in vector_facies:
plt.figure(figsize=(8,4))
plt.subplot(121)
plt.title(i)
plt.xlabel('PhiN')
plt.hist(training_data[training_data.FaciesLabels2 == i].PhiN, bins=20)
plt.subplot(122)
plt.title(i)
plt.xlabel('PhiD')
plt.hist(training_data[training_data.FaciesLabels2 == i].PhiD, bins=20)
plt.show()
In [23]:
training_data[(training_data['PhiN'] > 35) | (training_data['PhiD'] > 35) |
(training_data['PhiN'] < 0) | (training_data['PhiD'] < 0)].Facies.count()
Out[23]:
In [24]:
vector_facies = training_data.FaciesLabels2.unique()
for i in vector_facies:
plt.figure(figsize=(5,4))
plt.title(i)
plt.xlabel('GR')
plt.ylabel('Rild')
plt.scatter(training_data[training_data.FaciesLabels2 == i].GR,
training_data[training_data.FaciesLabels2 == i].ILD_log10, marker = 'o')
In [25]:
df_litology.GR.mean()
Out[25]:
In [26]:
df_litology.GR.median()
Out[26]:
In [27]:
df_litology.GR.min()
Out[27]:
In [28]:
df_litology.GR.max()
Out[28]:
In [29]:
training_data[training_data['GR']>175].GR.count()
Out[29]:
In [30]:
training_data.head()
Out[30]:
In [31]:
sns.countplot(y=training_data.Formation)
Out[31]:
In [32]:
sns.countplot(y=training_data.Formation, hue=training_data.NM_M)
Out[32]:
In [33]:
formation_array = training_data.Formation.unique()
In [34]:
a = np.asarray(training_data.Formation)
training_data['Label_Form'] = label_formation(a,formation_array)
| Numerical Formation | Formation |
|---|---|
| 1 | A1 SH |
| 2 | A1 LM |
| 3 | B1 SH |
| 4 | B1 LM |
| 5 | B2 SH |
| 6 | B2 LM |
| 7 | B3 SH |
| 8 | B3 LM |
| 9 | B4 SH |
| 10 | B4 LM |
| 11 | B5 SH |
| 12 | B5 LM |
| 13 | C SH |
| 14 | C LM |
In [35]:
training_data.head()
Out[35]:
In [36]:
training_data['Label_Form_SH_LM'] = training_data.Formation.apply(label_two_groups_formation)
In [37]:
training_data.head()
Out[37]:
In [38]:
training_data['RELPOS_NM_M'] = NM_M_TOPO(training_data)
In [39]:
training_data.describe()
Out[39]:
In [40]:
training_data['RELPOS_SH_LM'] = SH_LM_TOPO(training_data)
In [41]:
training_data.head()
Out[41]:
In [42]:
features = training_data[['GR','ILD_log10','DeltaPHI','PHIND','PE','NM_M','RELPOS','RELPOS_NM_M','Label_Form']]
features = features.dropna()
In [43]:
blind = training_data[training_data['Well Name'] == 'SHANKLE']
training_data = training_data
data_test = training_data[training_data['Well Name'] != 'SHANKLE']
training_data = training_data.fillna(-99999)
In [44]:
training_data.describe()
Out[44]:
In [45]:
features = training_data[['Facies','GR','ILD_log10','DeltaPHI','PHIND','PE','RELPOS','Label_Form_SH_LM']]
features.to_csv('training.csv', index=False)
In [46]:
blind_features = blind[['Facies','GR','ILD_log10','DeltaPHI','PHIND','PE','RELPOS','Label_Form_SH_LM']]
In [47]:
blind_features.to_csv('blind.csv',index=False)
In [48]:
data_test = data_test[['Facies','GR','ILD_log10','DeltaPHI','PHIND','PE','RELPOS','Label_Form_SH_LM']]
data_test.to_csv('data-test.csv', index=False)
In [49]:
training_data.describe()
Out[49]:
In [50]:
training_data.head()
Out[50]: